*****************************************************************************
*------------------------**------------------------------**------------------
*       Program Purpose: Prepare Dataset for Kenya Resilience Study 
*							for Technical Consortium						*
*							

*                              WOMAN REGRESSIONS                            *
* 					 Sara Signorelli , November 9th 2015
*------------------------**------------------------------**------------------
******************************************************************************


****************** Set the directories 
global out  "D:\Users\SSIGNORELLI\Dropbox (IFPRI)\AR research\Kenya_Resilience\out\Women"


***********************************************
********** WOMEN LEVEL ANALYSIS ***************
***********************************************

set more off
use   "$out\KEN_DHS_Women.dta", clear


***** Controls
gen rural=(urbrur==2)
recode w_married (2=0)
gen w_notworking=(w_occupation==0) if year>1993
replace w_notworking=(w_occupation==.) if year==1993
gen w_agriwork=(w_occupation==4 | w_occupation==5)
replace w_agriwork=. if w_occupation==.
gen fem_head=(sex_head==2)

***** Year and region fixed effects
tab regcode  if dry_land>0, gen(regdry)
tab regcode, gen(regall)
tab year, gen(yrd)
gen temp=indweight*100
gen i_weight=round(temp)
drop temp

**** Outcome variables
rename iw_v445_underweight_adj underweight
rename iw_v445_overweight_adj  overweight
rename iw_v445_obese_adj       obese
rename iw_v445_bmi             bmi

** CONSTRUCTION OF PSEUDO PANEL USING PSMATCHING
global pp_controls rural w_edu w_age w_married n_children w_notworking w_agriwork ih_hv207_radio 
global pp_outcomes underweight overweight obese

gen all_land=1


cd "$out"
foreach file in probit_ppanel_dry probit_ppanel_all {
cap rm `file'.xml
cap rm `file'.txt
}



foreach cond in dry all {
foreach yvar in $pp_outcomes {
forval y=1/4 {
probit `yvar' $pp_controls reg`cond'* if yrd`y'==1 & `cond'_land>0 [pw=indweight], cluster(cluster)
outreg2 using probit_ppanel_`cond',  ctitle("`var'") excel label
predict `yvar'_`cond'_`y'
xtile `yvar'_`cond'_`y'_q=`yvar'_`cond'_`y', n(5)
drop `yvar'_`cond'_`y'
}
}
}

 
foreach cond in dry all {
gen pp_grp_underwgt_`cond'=.  
gen pp_grp_overwgt_`cond'=.
gen pp_grp_obese_`cond'=.

forval y=1/4 {
replace pp_grp_underwgt_`cond'= underweight_`cond'_`y'_q        if yrd`y'==1 & `cond'_land>0 
replace pp_grp_overwgt_`cond'=  overweight_`cond'_`y'_q         if yrd`y'==1 & `cond'_land>0
replace pp_grp_obese_`cond'=    obese_`cond'_`y'_q              if yrd`y'==1 & `cond'_land>0 
drop underweight_`cond'_`y'_q  overweight_`cond'_`y'_q obese_`cond'_`y'_q  
}
}

** CONSTRUCTION OF PSEUDO PANEL USING REG on BMI
foreach cond in dry all {
forval y=1/4 {
reg bmi $pp_controls reg`cond'* if yrd`y'==1 & `cond'_land>0 [pw=indweight], cluster(cluster)
outreg2 using probit_ppanel_`cond',  ctitle("BMI") excel label
predict bmi_`cond'_`y'
xtile bmi_`cond'_`y'_q=bmi_`cond'_`y', n(5)
drop bmi_`cond'_`y'
}
}

 
foreach cond in dry all {
gen pp_grp_bmi_`cond'=.  
forval y=1/4 {
replace pp_grp_bmi_`cond'= bmi_`cond'_`y'_q        if yrd`y'==1 & `cond'_land>0
drop bmi_`cond'_`y'_q  
}
}


** BIO VARIABLES
*******************************************
foreach var in temperature Ltemperature temp_season rainfall Lrainfall  PDSI NDVI {
gen `var'=.
}

forval i=1993(5)2008 {
local j=`i'-1
local j2=`i'-2
replace     temperature=bio1_`j'   if year==`i'
replace     Ltemperature=bio1_`j2' if year==`i'
replace     temp_season=bio4_`j'   if year==`i'
replace     rainfall=bio12_`j'     if year==`i'
replace     Lrainfall=bio12_`j2'   if year==`i'
replace     PDSI=pdsi`j'           if year==`i'
replace     NDVI=NDVI_`j'          if year==`i'
}

gen drought=-PDSI
replace NDVI=NDVI/10000

lab var underweight 			"Woman is underweight"
lab var overweight  			"Woman is overweight"
lab var obese  			        "Woman is obese"
lab var  bmi					"Woman bmi"
lab var yrd1					"year 1993"
lab var yrd2					"year 1998"
lab var yrd3					"year 2003"
lab var yrd4 					"year 2008" 
lab var regdry1					"Coast Region"
lab var regdry2					"Eastern Region"
lab var regdry3					"Rift Valley Region"
lab var regdry4					"North Eastern Region"
lab var rainfall  				"Mean annual rainfall"
lab var Lrainfall  				"Lagged mean annual rainfall"
lab var temperature  			"Mean annual temperature"
lab var Ltemperature  			"Lagged mean annual temperature"
lab var NDVI					"NDVI"
lab var soc_total				"Soil organic carbon total (permiles)"
lab var pp_grp_underwgt_dry         "Pseudo-panel group for dry areas: underweight"
lab var pp_grp_overwgt_dry          "Pseudo-panel group for dry areas: overweight" 
lab var pp_grp_obese_dry            "Pseudo-panel group for dry areas: obese"
lab var pp_grp_bmi_dry              "Pseudo-panel group for dry areas: bmi"
lab var pp_grp_underwgt_all         "Pseudo-panel group for all areas: underweight"
lab var pp_grp_overwgt_all          "Pseudo-panel group for all areas: overweight" 
lab var pp_grp_obese_all            "Pseudo-panel group for all areas: obese"
lab var pp_grp_bmi_all              "Pseudo-panel group for all areas: bmi"
  

gen arid_area=(dry_land>0)

** gen pseudo-panel groupps
foreach var in dry all {
tab pp_grp_underwgt_`var', gen(punwgt_`var')
tab pp_grp_overwgt_`var', gen(povwgt_`var')
tab pp_grp_obese_`var', gen(pobe_`var')
tab pp_grp_bmi_`var', gen(pbmi_`var')
}

** summary statistics
preserve
collapse underweight bmi temperature drought [pw=indweight], by (regname year)
order year, after(regname)
export excel using "$out\SumStat.xls", firstrow(varlabels) replace
restore



****** regression variables *****************************
************************************************

* Underweight
global anthro1  underweight
global control1_dry punwgt_dry2-punwgt_dry5
global control1_all arid_area punwgt_all2-punwgt_all5 

* Overweight
global anthro2  overweight
global control2_dry povwgt_dry2-povwgt_dry5 
global control2_all arid_area povwgt_all2-povwgt_all5 

* Obese
global anthro3  obese
global control3_dry pobe_dry2-pobe_dry5    
global control3_all arid_area pobe_all2-pobe_all5

** BMI
global bmi bmi
global control_bmi_dry pbmi_dry2-pbmi_dry5
global control_bmi_all arid_area pbmi_all2-pbmi_all5

** bio variables
global bio1 temperature drought NDVI  
global bio2 temperature temp_season rainfall rain_season

** Resilience interactions
gen nonagri_job=(w_occupation==1 | w_occupation==2 | w_occupation==3 | w_occupation==7 | w_occupation==8 | w_occupation==9  | ///
				occupation_partner==1 | occupation_partner==2 | occupation_partner==3 | occupation_partner==7 | occupation_partner==8 | occupation_partner==9 )

replace nonagri_job=. if occupation_partner==. & w_occupation==.

gen tempXnagri=temperature*nonagri_job
gen DroughtXnagri=drought*nonagri_job
gen RainXnagri=rainfall*nonagri_job

gen tempXedu=temperature*w_edu
gen DroughtXedu=drought*w_edu
gen RainXedu=rainfall*w_edu

global resilience1 tempXnagri  DroughtXnagri  nonagri_job
global resilience2 tempXedu DroughtXedu w_edu 

***********************************************************
***********************************************************



** Simulations **
gen drought_sim=drought+0.5*drought
gen temperature_sim=temperature+2
gen ndvi_sim=NDVI-0.5*NDVI


*************************************************************
********** REGRESSION ANALYSIS: ANTHROPOMETRICS *************
*************************************************************


cd "$out"
set more off

foreach file in probit_res_dry probit_res_all reg_res_dry reg_res_all   {
cap rm `file'.xml
cap rm `file'.txt
}

*** PROBIT ON LIKELIHOOD OF BEING UNDERWEIGHT ***
*************************************************
foreach cond in dry all {
forval i=1/3 {
foreach var in ${anthro`i'} {


** 1) Without Resilience controls
probit `var' ${control`i'_`cond'} $bio1 yrd2-yrd4  if `cond'_land>0 [pw=indweight],  cluster(cluster)
outreg2 using probit_res_`cond',  ctitle("`var'") excel label 

predict `var'_hat_`cond'

** Simulate temperature shock
gen temperature_temp=temperature
replace temperature=temperature_sim
predict `var'_hat_simtemp_`cond'
replace temperature=temperature_temp
drop temperature_temp

** Simulate drought shock
gen drought_temp=drought
replace drought=drought_sim
predict `var'_hat_simdr_`cond'
replace drought=drought_temp
drop drought_temp

** Simulate NDVI shock
gen NDVI_temp=NDVI
replace NDVI=ndvi_sim
predict `var'_hat_simndvi_`cond'
replace NDVI=NDVI_temp
drop NDVI_temp

** Simulate drought and temperature shock
gen drought_temp=drought
gen temperature_temp=temperature
replace drought=drought_sim
replace temperature=temperature_sim
predict `var'_hat_simtemdr_`cond'
replace drought=drought_temp
replace temperature=temperature_temp
drop drought_temp temperature_temp

** Simulate drought and temperature shock
gen drought_temp=drought
gen temperature_temp=temperature
gen NDVI_temp=NDVI
replace drought=drought_sim
replace temperature=temperature_sim
replace NDVI=ndvi_sim
predict `var'_hat_simall_`cond'
replace drought=drought_temp
replace temperature=temperature_temp
replace NDVI=NDVI_temp
drop drought_temp temperature_temp NDVI_temp
}
}
}


foreach cond in dry all {
forval i=1/3 {
foreach var in ${anthro`i'} {


** 1) With Resilience controls
probit `var' ${control`i'_`cond'} $resilience1 $bio1 yrd2-yrd4 [pw=indweight] if `cond'_land>0,  cluster(cluster)
outreg2 using probit_res_`cond',  ctitle("`var'", nonagri job) excel label  
probit `var' ${control`i'_`cond'} $resilience2 $bio1 yrd2-yrd4 [pw=indweight] if `cond'_land>0,  cluster(cluster)
outreg2 using probit_res_`cond',  ctitle("`var'", edu) excel label

} 
}
}


*** REGRESSIONS ON BMI ** 
*****************************

foreach cond in dry all {
** 1) Without resilience controls
reg bmi ${control_bmi_`cond'} $bio1 yrd2-yrd4 [pw=indweight] if `cond'_land>0,  cluster(cluster)
outreg2 using reg_res_`cond',  ctitle("bmi") excel label 

forval p=25(25)75 {
qreg bmi ${control_bmi_`cond'} $bio1 yrd2-yrd4 [pw=indweight]  if `cond'_land>0, vce(robust) quantile(0.`p')
outreg2 using reg_res_`cond',    ctitle("bmi", p`p') excel label 
}
}


foreach cond in dry all {
** 2a) With resilience controls: non agri job
reg bmi ${control_bmi_`cond'} $resilience1 $bio1 yrd2-yrd4 [pw=indweight] if `cond'_land>0,  cluster(cluster)
outreg2 using reg_res_`cond',  ctitle("bmi", non agri job) excel label 

forval p=25(25)75 {
qreg bmi ${control_bmi_`cond'} $resilience1 $bio1 yrd2-yrd4 [pw=indweight]  if `cond'_land>0, vce(robust) quantile(0.`p')
outreg2 using reg_res_`cond',    ctitle("bmi", non agri job p`p') excel label 
}

** 2b) With resilience controls: cattle ownership
reg bmi ${control_bmi_`cond'} $resilience2 $bio1 yrd2-yrd4 [pw=indweight] if `cond'_land>0,  cluster(cluster)
outreg2 using reg_res_`cond',  ctitle("bmi", edu) excel label  

forval p=25(25)75 {
qreg bmi ${control_bmi_`cond'} $resilience2 $bio1 yrd2-yrd4 [pw=indweight]  if `cond'_land>0, vce(robust) quantile(0.`p')
outreg2 using reg_res_`cond',    ctitle("bmi", edu p`p') excel label 
}


}


** Simulations Results **
*************************

foreach cond in dry all {
foreach var in underweight overweight obese {
tabstat `var' `var'_hat_`cond' `var'_hat_simtemp_`cond' `var'_hat_simdr_`cond' ///
`var'_hat_simndvi_`cond' `var'_hat_simtemdr_`cond' `var'_hat_simall_`cond' if `cond'_land>0, by(year)
}
}


